Visualizing spatio-temporal data

install & load packages

library(robis)
library(sf)
library(dplyr)
library(ggplot2)
library(raster)
library(sf)
library(spacetime)
library(sp)
library(mapview)

# if needed
# install.packages(c("robis","sf","dplyr","ggplot2","raster","sf","spacetime","sp"))
# install_github("andrewzm/STRbook")

Gathering occurrences from OBIS

Explore OBIS database and get species occurrences for Halibut (Hippoglossus hippoglossus).

halibut <- robis::occurrence(scientificname = "Hippoglossus hippoglossus", 
               geometry="POLYGON((-64.84626027288095 48.91641792598304,-64.01129933538095 50.49356505646415,-68.82331105413095 50.11464395818477,-71.04254933538095 46.80876488134784,-64.84626027288095 48.91641792598304))"
          )
## Retrieved 228 records of approximately 228 (100%)

Convert halibut dataframe to a sf object

sf_halibut <- st_as_sf(halibut, 
                    coords = c("decimalLongitude","decimalLatitude"),
                    crs = 4326)

The OBIS database provide a lot of context on the records.

names(sf_halibut)
##  [1] "rightsHolder"             "country"                 
##  [3] "date_year"                "institutionID"           
##  [5] "scientificNameID"         "year"                    
##  [7] "scientificName"           "dynamicProperties"       
##  [9] "dropped"                  "aphiaID"                 
## [11] "language"                 "phylumid"                
## [13] "familyid"                 "catalogNumber"           
## [15] "basisOfRecord"            "terrestrial"             
## [17] "superclass"               "modified"                
## [19] "maximumDepthInMeters"     "id"                      
## [21] "day"                      "order"                   
## [23] "superclassid"             "dataset_id"              
## [25] "collectionCode"           "speciesid"               
## [27] "date_end"                 "occurrenceID"            
## [29] "license"                  "date_start"              
## [31] "month"                    "genus"                   
## [33] "eventDate"                "brackish"                
## [35] "scientificNameAuthorship" "absence"                 
## [37] "subfamily"                "genusid"                 
## [39] "originalScientificName"   "marine"                  
## [41] "minimumDepthInMeters"     "subphylumid"             
## [43] "subfamilyid"              "institutionCode"         
## [45] "countryCode"              "date_mid"                
## [47] "eventTime"                "class"                   
## [49] "orderid"                  "datasetName"             
## [51] "kingdom"                  "specificEpithet"         
## [53] "classid"                  "phylum"                  
## [55] "species"                  "subphylum"               
## [57] "family"                   "category"                
## [59] "kingdomid"                "node_id"                 
## [61] "individualCount"          "fieldNumber"             
## [63] "type"                     "rights"                  
## [65] "recordedBy"               "datasetID"               
## [67] "locality"                 "taxonRank"               
## [69] "depth"                    "geometry"

We perform a visual check of the occurrences.

mapview(sf_halibut)

Visual representation per year

We can now build a representation of the occurrences per year, but firsts we have to extract the polygon for the area around the estuary.

poly_ca <- raster::getData('GADM', country='CA', level=1)
sf_ca <- st_as_sf(poly_ca)
sf_estuary <- st_crop(sf_ca, st_as_sfc(st_bbox(sf_halibut)))
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries

Following, we can visualize the data

ggplot() + 
     geom_sf(data = sf_estuary, fill = "grey50", col = "white") + 
     geom_sf(data = filter(sf_halibut, year > 2006), aes(col = depth)) + 
     facet_wrap(~year)

Count observations per year with rasterize

If you have a large dataset with a high density of points, it is a good idea to summarize the informations using a grid of abundance counts.

# get number of years
years <- unique(sf_halibut$year)

# Create a reference grid
ref_grid <- raster(sf_halibut, res=0.1)

# Open empty stack
stack_count <- stack()

for(i in 1:length(years)){
     # Filter for one year (years[i])
     sf_halibut_yr <- filter(sf_halibut, year == years[i])
     
     # Prepare the geometry
     sp_halibut_yr <- as(st_geometry(sf_halibut_yr), "Spatial")

     # Count points per cell
     rs_count <- rasterize(sp_halibut_yr, ref_grid, fun = 'count')

     # Add the layer to the stack
     stack_count <- addLayer(stack_count,rs_count)
}

names(stack_count) <- years
mapview(stack_count[["X2011"]])

Compute mean values with rasterize

We load the air data PM10 air quality data.

raw_air <- readRDS("./assets/data/air.rds") 
# Transforming data to sf
sf_air <- st_as_sf(raw_air, coords=c("lng", "lat"), crs = 4326)

We select the variable that we want to compute the mean.

sf_air_pm10 <- sf_air[,"pm10"]

Then, we create the reference grid and perform the rasterize on the points data.

ref_rs <- raster(sf_air_pm10, res = 0.5)
rs_mean <- rasterize(sf_air_pm10, ref_rs, fun = mean)
mapview(rs_mean[["pm10"]])

Practice

  1. Collect the occurences of your favorite species on OBIS.
    • robis::occurrence(scientificname = "Hippoglossus hippoglossus")
  2. Visualize the distribution of this species across several years using ggplot2.
  3. Produce count grids per year for this species.

Spatial variability

Hovmöller plot

data("NOAA_df_1990", package = "STRbook")

Tmax <- filter(NOAA_df_1990,     # subset the data
              proc == "Tmax" &   # only max temperature
              month %in% 5:9 &   # May to September
              year == 1993)      # year of 1993

lim_lat <- range(Tmax$lat)        # latitude range

lat_axis <- seq(lim_lat[1],       # latitude axis
                lim_lat[2],
                length=25)

Tmax$t <- Tmax$julian - 728049     # create a new time variable
Tmax_grid <- Tmax
dists <- abs(outer(Tmax$lat, lat_axis, "-"))
Tmax_grid$lat <- lat_axis[apply(dists, 1, which.min)]

Tmax_lat_Hov <- group_by(Tmax_grid, lat, t) %>%
                summarise(z = mean(z))

library(STRbook)
Hovmoller_lat <- ggplot(Tmax_lat_Hov) +            # take data
        geom_tile(aes(x = lat, y = t, fill = z)) + # plot
        fill_scale(name = "degF") +     # add color scale
        scale_y_reverse() +             # rev y scale
        ylab("Day number (days)") +     # add y label
        xlab("Latitude (degrees)") +    # add x label
        theme_bw()                      # change theme

Hovmoller_lat

Temporal variability

Trends plots by stations

data("NOAA_df_1990", package = "STRbook")

Tmax <- filter(NOAA_df_1990,     # subset the data
              proc == "Tmax" &   # only max temperature
              month %in% 5:9 &   # May to September
              year == 1993)      # year of 1993

Tmax_av <- group_by(Tmax, date) %>%
           summarise(meanTmax = mean(z))

gTmaxav <-
    ggplot() +
    geom_line(data = Tmax,aes(x = date, y = z, group = id),
              colour = "blue", alpha = 0.04) +
    geom_line(data = Tmax_av, aes(x = date, y = meanTmax)) +
    xlab("Month") + ylab("Maximum temperature (degF)") +
    theme_bw()

gTmaxav

Transform your data into spacetime classes

To perform spatio-temporal variograms with gstat, we need to convert our data into one of the following different spacetime classes of object. To build these objects, the best way is to use the stConstruct function in the spacetime package.

Get access to the data

Download PM10 air quality data

spacetime classes

  • full grid (STF), a combination of any sp object and any xts object to represent all possible locations on the implied space-time lattice;
  • sparse grid (STS), as STF, but contains only the non-missing space-time combinations on a space-time lattice;
  • irregular (STI), an irregular space-time data structure, where each point is allocated a spatial coordinate and a time stamp;
  • simple trajectories (STT), a sequence of space-time points that form trajectories.
library(spacetime)

raw_air <- readRDS("./assets/data/air.rds")
head(raw_air)
##   station_id   pm10      dates      lng      lat
## 1    DEBB051  8.688 2001-06-08 14.05746 52.54136
## 2    DEBB051  7.354 2001-09-10 14.05746 52.54136
## 3    DEBB051 15.729 2001-10-10 14.05746 52.54136
## 4    DEBB051 26.312 2001-05-03 14.05746 52.54136
## 5    DEBB051 13.958 2001-10-28 14.05746 52.54136
## 6    DEBB051 39.188 2001-10-16 14.05746 52.54136
spaceTime_air <- stConstruct(raw_air, c("lng","lat"),"dates", crs = CRS("+init=epsg:4326"))
class(spaceTime_air)
## [1] "STIDF"
## attr(,"package")
## [1] "spacetime"
# subset 
raw_air_062005 <- subset(raw_air, dates > as.Date("2005-06-01") & dates < as.Date("2005-06-30"))

raw_air_062005 <- stConstruct(raw_air_062005, c("lng","lat"),"dates", crs = CRS("+init=epsg:4326"))

# Coerce the STI class to STF
STFDF_air_062005 <- as(raw_air_062005, "STFDF")
summary(STFDF_air_062005)
## Object of class STFDF
##  with Dimensions (s, t, attr): (46, 28, 2)
## [[Spatial:]]
## Object of class SpatialPoints
## Coordinates:
##          min      max
## lng  6.28107 14.78617
## lat 47.80847 54.92497
## Is projected: FALSE 
## proj4string :
## [+init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84
## +towgs84=0,0,0]
## Number of points: 46
## [[Temporal:]]
##      Index              timeIndex    
##  Min.   :2005-06-02   Min.   : 1.00  
##  1st Qu.:2005-06-08   1st Qu.: 7.75  
##  Median :2005-06-15   Median :14.50  
##  Mean   :2005-06-15   Mean   :14.50  
##  3rd Qu.:2005-06-22   3rd Qu.:21.25  
##  Max.   :2005-06-29   Max.   :28.00  
## [[Data attributes:]]
##    station_id        pm10       
##  DESH001:  28   Min.   : 2.708  
##  DENI063:  28   1st Qu.:10.739  
##  DEUB038:  28   Median :14.191  
##  DEBE056:  28   Mean   :15.100  
##  DEBE032:  28   3rd Qu.:18.583  
##  (Other):1108   Max.   :53.292  
##  NA's   :  40   NA's   :40
# Hovmöller plot
stplot(STFDF_air_062005, mode = "xt")

# Temporal-space variability
stplot(STFDF_air_062005[,,"pm10"], mode = "xy")